#################################
# REPLICATION: ONLINE SURVEY ####
# Handi Li, June 2025    
#################################

## Load Packages ####

packages <- c("readr","tidyverse","fixest","ggplot2", "gridExtra","kableExtra")

for (pkg in packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
}

## Load Data ####

d.online_new <- read_csv("/complete_survey_data.csv", locale = locale(encoding = "UTF-8"))

## Balance ####

names <- c("gender","age","rural","econ_perception","party","education",
           "student","unemployed","public_work","income","know_official","know_lawyer",
           "socpol_news","know_ogi","use_ogi","trust_ogi","dispute_experience",
           "choose_legal","choose_political","choose_protest","important_ogi",
           "important_connection","able_connection",
           "attention","Duration","Mobile","survey_times",
           "group","positive_treat","noinfo_treat")
samp.bal <- d.online_new[names]
samp.bal <- data.frame(na.omit(samp.bal))

source("/balance_test.r")

d1 <- filter(samp.bal, group == 1 | group == 2)
d2 <- filter(samp.bal, group == 1 | group == 3)
d3 <- filter(samp.bal, group == 2 | group == 3)

kable(balance(d1[1:27], d1$noinfo_treat, names[1:27]), "rst")
kable(balance(d2[1:27], d2$positive_treat, names[1:27]), "rst")
kable(balance(d3[1:27], d3$positive_treat, names[1:27]), "rst")

## Results ####

# Rescale answers
d.online_new[,c(10:11,13,15:16,19:22,26:29,34:40)] <- lapply(d.online_new[,c(10:11,13,15:16,19:22,26:29,34:40)], function(x)(x-1)/4)

# Create outcome variables for DiD
d.online_new$diff_legal <- d.online_new$legal - d.online_new$choose_legal
d.online_new$diff_political <- d.online_new$political - d.online_new$choose_political

#### Full result for Table 1 ####

m.content.legal <- lm(legal ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.legal.c <- lm(legal ~ positive_treat + 
                          gender + age + rural + econ_perception + party + education +
                          student + unemployed + public_work + income + know_official + know_lawyer +
                          socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                          choose_legal + choose_political + choose_protest + important_ogi + 
                          important_connection + able_connection + attention + 
                          factor(province), 
                        data = d.online_new, subset = (group == 1 | group == 3))

m.content.political <- lm(political ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.political.c <- lm(political ~ positive_treat + 
                              gender + age + rural + econ_perception + party + education +
                              student + unemployed + public_work + income + know_official + know_lawyer +
                              socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                              choose_legal + choose_political + choose_protest + important_ogi + 
                              important_connection + able_connection + attention + 
                              factor(province), 
                            data = d.online_new, subset = (group == 1 | group == 3))

m.content.df_legal <- lm(diff_legal ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.df_legal.c <- lm(diff_legal ~ positive_treat + 
                             gender + age + rural + econ_perception + party + education +
                             student + unemployed + public_work + income + know_official + know_lawyer +
                             socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                             choose_legal + choose_political + choose_protest + important_ogi + 
                             important_connection + able_connection + attention + 
                             factor(province), 
                           data = d.online_new, subset = (group == 1 | group == 3))

m.content.df_political <- lm(diff_political ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.df_political.c <- lm(diff_political ~ positive_treat + 
                                 gender + age + rural + econ_perception + party + education +
                                 student + unemployed + public_work + income + know_official + know_lawyer +
                                 socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                 choose_legal + choose_political + choose_protest + important_ogi + 
                                 important_connection + able_connection + attention + 
                                 factor(province), 
                               data = d.online_new, subset = (group == 1 | group == 3))

#### Full result for Table 2 ####

m.content.take <- lm(take ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.take.c <- lm(take ~ positive_treat + 
                         gender + age + rural + econ_perception + party + education +
                         student + unemployed + public_work + income + know_official + know_lawyer +
                         socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                         choose_legal + choose_political + choose_protest + important_ogi + 
                         important_connection + able_connection + attention + 
                         factor(province), 
                       data = d.online_new, subset = (group == 1 | group == 3))

m.content.institution1 <- lm(protest_later ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.institution1.c <- lm(protest_later ~ positive_treat + 
                                 gender + age + rural + econ_perception + party + education +
                                 student + unemployed + public_work + income + know_official + know_lawyer +
                                 socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                 choose_legal + choose_political + choose_protest + important_ogi + 
                                 important_connection + able_connection + attention + 
                                 factor(province), 
                               data = d.online_new, subset = (group == 1 | group == 3))

m.content.institution2 <- lm(institution_first1 ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.content.institution2.c <- lm(institution_first1 ~ positive_treat + 
                                 gender + age + rural + econ_perception + party + education +
                                 student + unemployed + public_work + income + know_official + know_lawyer +
                                 socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                 choose_legal + choose_political + choose_protest + important_ogi + 
                                 important_connection + able_connection + attention + 
                                 factor(province), 
                               data = d.online_new, subset = (group == 1 | group == 3))

#### Full result for Table 3 ####

m.cm.conf <- lm(confidence_institution ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.cm.conf.c <- lm(confidence_institution ~ positive_treat + 
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + income + know_official + know_lawyer +
                    socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                    choose_legal + choose_political + choose_protest + important_ogi + 
                    important_connection + able_connection + attention + 
                    factor(province), 
                  data = d.online_new, subset = (group == 1 | group == 3))
m.cm.info <- lm(information_institution ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.cm.info.c <- lm(information_institution ~ positive_treat + 
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + income + know_official + know_lawyer +
                    socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                    choose_legal + choose_political + choose_protest + important_ogi + 
                    important_connection + able_connection + attention + 
                    factor(province), 
                  data = d.online_new, subset = (group == 1 | group == 3))
m.cm.fair_legal <- lm(fair_legal ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.cm.fair_legal.c <- lm(fair_legal ~ positive_treat + 
                          gender + age + rural + econ_perception + party + education +
                          student + unemployed + public_work + income + know_official + know_lawyer +
                          socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                          choose_legal + choose_political + choose_protest + important_ogi + 
                          important_connection + able_connection + attention + 
                          factor(province), 
                        data = d.online_new, subset = (group == 1 | group == 3))
m.cm.fair_political <- lm(fair_political ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.cm.fair_political.c <- lm(fair_political ~ positive_treat + 
                              gender + age + rural + econ_perception + party + education +
                              student + unemployed + public_work + income + know_official + know_lawyer +
                              socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                              choose_legal + choose_political + choose_protest + important_ogi + 
                              important_connection + able_connection + attention + 
                              factor(province), 
                            data = d.online_new, subset = (group == 1 | group == 3))

#### Full result for Table 4 ####

# Check share of social desirability
table(d.online_new$post_criticism) # real experience publicly criticizing gov
table(d.online_new$noanswer) # no answer

m.cost <- lm(lowcost_legal ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.cost.c <- lm(lowcost_legal ~ positive_treat + 
                 gender + age + rural + econ_perception + party + education +
                 student + unemployed + public_work + income + know_official + know_lawyer +
                 socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                 choose_legal + choose_political + choose_protest + important_ogi + 
                 important_connection + able_connection + attention + 
                 factor(province), 
               data = d.online_new, subset = (group == 1 | group == 3))

m.social_desire1 <- lm(post_criticism ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.social_desire1.c <- lm(post_criticism ~ positive_treat + 
                           gender + age + rural + econ_perception + party + education +
                           student + unemployed + public_work + income + know_official + know_lawyer +
                           socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                           choose_legal + choose_political + choose_protest + important_ogi + 
                           important_connection + able_connection + attention + 
                           factor(province), 
                         data = d.online_new, subset = (group == 1 | group == 3))

m.social_desire2 <- lm(noanswer ~ positive_treat, data = d.online_new, subset = (group == 1 | group == 3))
m.social_desire2.c <- lm(noanswer ~ positive_treat + 
                           gender + age + rural + econ_perception + party + education +
                           student + unemployed + public_work + income + know_official + know_lawyer +
                           socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                           choose_legal + choose_political + choose_protest + important_ogi + 
                           important_connection + able_connection + attention + 
                           factor(province), 
                         data = d.online_new, subset = (group == 1 | group == 3))

#### Full result for Table C.1 ####

m.announce.legal <- lm(legal ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.legal.c <- lm(legal ~ noinfo_treat + 
                           gender + age + rural + econ_perception + party + education +
                           student + unemployed + public_work + income + know_official + know_lawyer +
                           socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                           choose_legal + choose_political + choose_protest + important_ogi + 
                           important_connection + able_connection + attention + 
                           factor(province), 
                         data = d.online_new, subset = (group == 1 | group == 2))

m.announce.political <- lm(political ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.political.c <- lm(political ~ noinfo_treat + 
                               gender + age + rural + econ_perception + party + education +
                               student + unemployed + public_work + income + know_official + know_lawyer +
                               socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                               choose_legal + choose_political + choose_protest + important_ogi + 
                               important_connection + able_connection + attention + 
                               factor(province), 
                             data = d.online_new, subset = (group == 1 | group == 2))

m.announce.df_legal <- lm(diff_legal ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.df_legal.c <- lm(diff_legal ~ noinfo_treat + 
                              gender + age + rural + econ_perception + party + education +
                              student + unemployed + public_work + income + know_official + know_lawyer +
                              socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                              choose_legal + choose_political + choose_protest + important_ogi + 
                              important_connection + able_connection + attention + 
                              factor(province), 
                            data = d.online_new, subset = (group == 1 | group == 2))

m.announce.df_political <- lm(diff_political ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.df_political.c <- lm(diff_political ~ noinfo_treat + 
                                  gender + age + rural + econ_perception + party + education +
                                  student + unemployed + public_work + income + know_official + know_lawyer +
                                  socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                  choose_legal + choose_political + choose_protest + important_ogi + 
                                  important_connection + able_connection + attention + 
                                  factor(province), 
                                data = d.online_new, subset = (group == 1 | group == 2))

#### Full Result for Table C.2 ####

m.announce.take <- lm(take ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.take.c <- lm(take ~ noinfo_treat + 
                          gender + age + rural + econ_perception + party + education +
                          student + unemployed + public_work + income + know_official + know_lawyer +
                          socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                          choose_legal + choose_political + choose_protest + important_ogi + 
                          important_connection + able_connection + attention + 
                          factor(province), 
                        data = d.online_new, subset = (group == 1 | group == 2))

m.announce.institution1 <- lm(protest_later ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.institution1.c <- lm(protest_later ~ noinfo_treat + 
                                  gender + age + rural + econ_perception + party + education +
                                  student + unemployed + public_work + income + know_official + know_lawyer +
                                  socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                  choose_legal + choose_political + choose_protest + important_ogi + 
                                  important_connection + able_connection + attention + 
                                  factor(province), 
                                data = d.online_new, subset = (group == 1 | group == 2))

m.announce.institution2 <- lm(institution_first1 ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.institution2.c <- lm(institution_first1 ~ noinfo_treat + 
                                  gender + age + rural + econ_perception + party + education +
                                  student + unemployed + public_work + income + know_official + know_lawyer +
                                  socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                                  choose_legal + choose_political + choose_protest + important_ogi + 
                                  important_connection + able_connection + attention + 
                                  factor(province), 
                                data = d.online_new, subset = (group == 1 | group == 2))

#### Full result for Table C.3 ####

m.announce.conf <- lm(confidence_institution ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.conf.c <- lm(confidence_institution ~ noinfo_treat + 
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + income + know_official + know_lawyer +
                    socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                    choose_legal + choose_political + choose_protest + important_ogi + 
                    important_connection + able_connection + attention + 
                    factor(province), 
                  data = d.online_new, subset = (group == 1 | group == 2))
m.announce.info <- lm(information_institution ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.info.c <- lm(information_institution ~ noinfo_treat + 
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + income + know_official + know_lawyer +
                    socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                    choose_legal + choose_political + choose_protest + important_ogi + 
                    important_connection + able_connection + attention + 
                    factor(province), 
                  data = d.online_new, subset = (group == 1 | group == 2))
m.announce.fair_legal <- lm(fair_legal ~ noinfo_treatt, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.fair_legal.c <- lm(fair_legal ~ noinfo_treat + 
                          gender + age + rural + econ_perception + party + education +
                          student + unemployed + public_work + income + know_official + know_lawyer +
                          socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                          choose_legal + choose_political + choose_protest + important_ogi + 
                          important_connection + able_connection + attention + 
                          factor(province), 
                        data = d.online_new, subset = (group == 1 | group == 2))
m.announce.fair_political <- lm(fair_political ~ noinfo_treat, data = d.online_new, subset = (group == 1 | group == 2))
m.announce.fair_political.c <- lm(fair_political ~ noinfo_treat + 
                              gender + age + rural + econ_perception + party + education +
                              student + unemployed + public_work + income + know_official + know_lawyer +
                              socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                              choose_legal + choose_political + choose_protest + important_ogi + 
                              important_connection + able_connection + attention + 
                              factor(province), 
                            data = d.online_new, subset = (group == 1 | group == 2))


## Attrition and Bounds (Tables F.1, F.2) ####

attrition_rate_legal <- nrow(subset(d.online_new,is.na(d.online_new$legal)))/nrow(d.online_new)
attrition_rate_political <- nrow(subset(d.online_new,is.na(d.online_new$political)))/nrow(d.online_new)
attrition_rate_direct <- nrow(subset(d.online_new,is.na(d.online_new$protest_later)))/nrow(d.online_new)
attrition_rate_rank_legal <- nrow(subset(d.online_new,is.na(d.online_new$rank_legal)))/nrow(d.online_new) # rate = 27%
attrition_rate_rank <- nrow(subset(d.online_new,is.na(d.online_new$institution_first1)))/nrow(d.online_new) # rate = 27%
attrition_rate_fair1 <- nrow(subset(d.online_new,is.na(d.online_new$fair_legal)))/nrow(d.online_new)
attrition_rate_fair2 <- nrow(subset(d.online_new,is.na(d.online_new$fair_political)))/nrow(d.online_new)
attrition_rate_confidence <- nrow(subset(d.online_new,is.na(d.online_new$confidence_institution)))/nrow(d.online_new)
attrition_rate_information <- nrow(subset(d.online_new,is.na(d.online_new$information_institution)))/nrow(d.online_new)
attrition_rate_take <- nrow(subset(d.online_new,is.na(d.online_new$take)))/nrow(d.online_new)
attrition_rate_lowcost <- nrow(subset(d.online_new,is.na(d.online_new$lowcost_legal)))/nrow(d.online_new)

# Apply Lee Bounds

d.online_check <- filter(d.online_new, group == 1 | group == 3)

pi1 <- nrow(d.online_check[!is.na(d.online_check$institution_first1) & d.online_check$positive_treat == 1,])/nrow(d.online_check[d.online_check$positive_treat == 1,])
pi0 <- nrow(d.online_check[!is.na(d.online_check$institution_first1) & d.online_check$positive_treat == 0,])/nrow(d.online_check[d.online_check$positive_treat == 0,])
q <- (pi0 - pi1)/pi0

nrow(d.online_check[d.online_check$positive_treat == 0,])*q

set.seed(2424)
d.online_check_upp <- d.online_check[-sample(which(d.online_check$institution_first1==0 & d.online_check$positive_treat == 0), 6),]
d.online_check_low <- d.online_check[-sample(which(d.online_check$institution_first1==1 & d.online_check$positive_treat == 0), 6),]

summary(feols(institution_first1 ~ positive_treat, 
              data = d.online_check_upp)) 

summary(feols(institution_first1 ~ positive_treat + 
                gender + age + rural + econ_perception + party + education +
                student + unemployed + public_work + income + know_official + know_lawyer +
                socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                choose_legal + choose_political + choose_protest + important_ogi + 
                important_connection + able_connection + attention + 
                factor(province), 
              data = d.online_check_upp)) 

summary(feols(institution_first1 ~ positive_treat, 
              data = d.online_check_low)) 

summary(feols(institution_first1 ~ positive_treat + 
                gender + age + rural + econ_perception + party + education +
                student + unemployed + public_work + income + know_official + know_lawyer +
                socpol_news + know_ogi + use_ogi + trust_ogi + dispute_experience + 
                choose_legal + choose_political + choose_protest + important_ogi + 
                important_connection + able_connection + attention + 
                factor(province), 
              data = d.online_check_low)) 

## Text analysis of the open question (Appendix D) ####

library(jiebaR) # may require remote installation from Git
library(stm)
library(tidytext)

tokenizer <- worker()

text_tok <- lapply(d.online_new$reason_help_institution, function(x) segment(x, tokenizer))

Sys.setlocale(locale = "chs")

d.online.text <- d.online_new %>% mutate(unique_id = row_number()) %>% select(unique_id, everything())

d_text <- tibble(unique_id = d.online.text$unique_id,
                 text_tok = text_tok)
d_text1 <- d_text %>% select(-text_tok)
d_text2 <- d_text %>% filter(length(text_tok) > 0) %>% unnest(text_tok)%>%
  group_by(unique_id, text_tok) %>%
  count()
d_text <- left_join(d_text1, d_text2, by="unique_id")

d_text_merge <- d_text %>% group_by(unique_id) %>% summarise(str_c(text_tok, collapse = " "))

colnames(d_text_merge)[2] <- "words"
data <- left_join(d_text_merge, d.online.text, by = "unique_id")

data_cov <- data %>% tidyr::drop_na(c("gender","age","rural","econ_perception","party","education",
                                      "student","unemployed","public_work","know_official","know_lawyer",
                                      "socpol_news","use_ogi","trust_ogi","dispute_experience","able_connection",
                                      "attention","reason_help_institution"))

processed <- textProcessor(data_cov$words, wordLengths=c(2,Inf), metadata=data_cov)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta

m.test.cov <- stm(out$documents, out$vocab, K=5, prevalence=~factor(group)+
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + know_official + know_lawyer +
                    socpol_news + use_ogi + trust_ogi + dispute_experience + 
                    able_connection + attention,          # with covariates
                  max.em.its=200, data=out$meta, init.type="Spectral", 
                  seed=20221)

plot(m.test.cov, type = "summary",n=5,labeltype = "frex",
     topic.names = c("know how to:","confidence:","evidence:","fairness:","protect rights:"),
     text.cex=2,xlim = c(0,0.6),cex.lab=2, cex.axis=2, cex.main=2)


plot(m.test.cov, type = "labels",labeltype = "frex",
     topic.names = c("know how to:","confidence:","evidence:","fairness:","protect rights:"),
     text.cex=2,cex.lab=2, cex.axis=2, cex.main=2)


m.test.cov <- stm(out$documents, out$vocab, K=10, prevalence=~factor(group)+
                    gender + age + rural + econ_perception + party + education +
                    student + unemployed + public_work + know_official + know_lawyer +
                    socpol_news + use_ogi + trust_ogi + dispute_experience + 
                    able_connection + attention,          # with covariates
                  max.em.its=200, data=out$meta, init.type="Spectral", 
                  seed=20222)

plot(m.test.cov, type = "summary",n=5,topics=c(9,6,4,8,3),labeltype = "frex",
     topic.names = c("fairness:","monitor:","evidence:","protect rights:","rule by law:"),
     text.cex=2,cex.lab=2, cex.axis=2, cex.main=2)


plot(m.test.cov, type = "labels",labeltype = "frex",topics=c(9,6,4,8,3),
     topic.names = c("fairness:","monitor:","evidence:","protect rights:","rule by law:"),
     text.cex=2,cex.lab=2, cex.axis=2, cex.main=2)


#Sys.setlocale("LC_ALL", "English_United States.UTF-8") # back to your locale
